
************************************************************
*  Regressions for Table 2b
************************************************************

*  For 1926, includes large regions of Siberia, Far East, Uralskaya, North Cauc. rather than small okrugs in these

clear
# delimit ;
version 11;
capture log close;
set more 1;


************************************************************
1926 Census data and regressions
************************************************************

*  1926 Census data;
use census1926;

*  Creating sex ratios for men -2 to +10/women, staggered 2 years;
capture program drop numerator;
program define numerator;
        local i = 15     ;
        local j = `i'-2  ;
        local k = `i'-1  ;
        local l = `i'+1  ;
        local m = `i'+2  ;
        local n = `i'+3  ;
        local o = `i'+4  ;
        local p = `i'+5  ;
        local q = `i'+6  ;
        local r = `i'+7  ;
        local s = `i'+8  ;
        local t = `i'+9  ;
        local u = `i'+10 ;
        local v = `i'+11 ;
        local w = `i'+12 ;
        local x = `i'+13 ;
        local y = `i'+14 ;
        while `i' <= 44 {  ;
        gen numu`i'=popmu`i'+popmu`j'+popmu`k'+popmu`l'+popmu`m'+popmu`n'+popmu`o'+popmu`p'+popmu`q'+popmu`r'+popmu`s'+popmu`t'+popmu`u'+popmu`v'+popmu`w'+popmu`x'+popmu`y';
        gen numr`i'=popmr`i'+popmr`j'+popmr`k'+popmr`l'+popmr`m'+popmr`n'+popmr`o'+popmr`p'+popmr`q'+popmr`r'+popmr`s'+popmr`t'+popmr`u'+popmr`v'+popmr`w'+popmr`x'+popmr`y';
        gen num`i'=numu`i'+numr`i';
        local i = `i' + 5;
        local j = `j' + 5;
        local k = `k' + 5;
        local l = `l' + 5;
        local m = `m' + 5;
        local n = `n' + 5;
        local o = `o' + 5;
        local p = `p' + 5;
        local q = `q' + 5;
        local r = `r' + 5;
        local s = `s' + 5;
        local t = `t' + 5;
        local u = `u' + 5;
        local v = `v' + 5;
        local w = `w' + 5;
        local x = `x' + 5;
        local y = `y' + 5;
        }  ;
end;

capture program drop denominator;
program define denominator;
        local i = 15     ;
        local j = `i'-2  ;
        local k = `i'-1  ;
        local l = `i'+1  ;
        local m = `i'+2  ;
        local n = `i'+3  ;
        local o = `i'+4  ;
        local p = `i'+5  ;
        local q = `i'+6  ;
        local r = `i'+7  ;
        local s = `i'+8  ;
        local t = `i'+9  ;
        local u = `i'+10 ;
        local v = `i'+11 ;
        local w = `i'+12 ;
        local x = `i'+13 ;
        local y = `i'+14 ;
        while `i' <= 44 {  ;
        gen denu`i'=popfu`i'+popfu`j'+popfu`k'+popfu`l'+popfu`m'+popfu`n'+popfu`o'+popfu`p'+popfu`q'+popfu`r'+popfu`s'+popfu`t'+popfu`u'+popfu`v'+popfu`w'+popfu`x'+popfu`y';
        gen denr`i'=popfr`i'+popfr`j'+popfr`k'+popfr`l'+popfr`m'+popfr`n'+popfr`o'+popfr`p'+popfr`q'+popfr`r'+popfr`s'+popfr`t'+popfr`u'+popfr`v'+popfr`w'+popfr`x'+popfr`y';
        gen den`i'=denu`i'+denr`i';
        local i = `i' + 5;
        local j = `j' + 5;
        local k = `k' + 5;
        local l = `l' + 5;
        local m = `m' + 5;
        local n = `n' + 5;
        local o = `o' + 5;
        local p = `p' + 5;
        local q = `q' + 5;
        local r = `r' + 5;
        local s = `s' + 5;
        local t = `t' + 5;
        local u = `u' + 5;
        local v = `v' + 5;
        local w = `w' + 5;
        local x = `x' + 5;
        local y = `y' + 5;
        }  ;
end;

numerator;
denominator;

forval i = 15(5)40 { ;
        gen sr10u`i'=numu`i'/denu`i';
        gen sr10r`i'=numr`i'/denr`i';
        gen sr10a`i'=num`i'/den`i';
        } ;


drop num* den*;
gen numu1819=popmu16+popmu17+popmu18+popmu19+popmu20+popmu21+popmu22+popmu23
            +popmu24+popmu25+popmu26+popmu27+popmu28+popmu29;
gen denu1819=popfu16+popfu17+popfu18+popfu19+popfu20+popfu21+popfu22+popfu23
            +popfu24+popfu25+popfu26+popfu27+popfu28+popfu29;

gen numr1819=popmr16+popmr17+popmr18+popmr19+popmr20+popmr21+popmr22+popmr23
            +popmr24+popmr25+popmr26+popmr27+popmr28+popmr29;
gen denr1819=popfr16+popfr17+popfr18+popfr19+popfr20+popfr21+popfr22+popfr23
            +popfr24+popfr25+popfr26+popfr27+popfr28+popfr29;

gen sr10u18=numu1819/denu1819;
gen sr10r18=numr1819/denr1819;
gen sr10a18=(numu1819+numr1819)/(denu1819+denr1819);


*  Stacking data by age group;
stack   region regno lgreg
        popf1819 popfu1819 popfr1819 popm1819 popmu1819 popmr1819
        pmarf1819 pmarfu1819 pmarfr1819 pmarm1819 pmarmu1819 pmarmr1819
        sr10a18 sr10u18 sr10r18

        region regno lgreg
        popf2024 popfu2024 popfr2024 popm2024 popmu2024 popmr2024
        pmarf2024 pmarfu2024 pmarfr2024 pmarm2024 pmarmu2024 pmarmr2024
        sr10a20 sr10u20 sr10r20

        region regno lgreg
        popf2529 popfu2529 popfr2529 popm2529 popmu2529 popmr2529
        pmarf2529 pmarfu2529 pmarfr2529 pmarm2529 pmarmu2529 pmarmr2529
        sr10a25 sr10u25 sr10r25

        region regno lgreg
        popf3034 popfu3034 popfr3034 popm3034 popmu3034 popmr3034
        pmarf3034 pmarfu3034 pmarfr3034 pmarm3034 pmarmu3034 pmarmr3034
        sr10a30 sr10u30 sr10r30

        region regno lgreg
        popf3539 popfu3539 popfr3539 popm3539 popmu3539 popmr3539
        pmarf3539 pmarfu3539 pmarfr3539 pmarm3539 pmarmu3539 pmarmr3539
        sr10a35 sr10u35 sr10r35

        region regno lgreg
        popf4044 popfu4044 popfr4044 popm4044 popmu4044 popmr4044
        pmarf4044 pmarfu4044 pmarfr4044 pmarm4044 pmarmu4044 pmarmr4044
        sr10a40 sr10u40 sr10r40 ,

        into(region regno lgreg
        popf popfu popfr popm popmu popmr
        pmarf pmarfu pmarfr pmarm pmarmu pmarmr
        sr10a sr10u sr10r ) clear;

ren _stack agegroup;
sort agegroup;

log using table2b.log, replace;
*  Dropping Krymskaya ASSR since have no population totals for this region;
drop if regno==1101;
*  Dropping large regions;
drop if regno==69 | regno==94 | regno==105 | regno==107 | regno==142;

quietly tab regno, gen(reg);
quietly tab agegroup, gen(agegroup);

replace pmarf=pmarfu if regno==7 | regno==16;
replace pmarm=pmarmu if regno==7 | regno==16;
replace popm=popmu if regno==7 | regno==16;
replace popf=popfu if regno==7 | regno==16;
replace sr10a=sr10u if regno==7 | regno==16;

*Rural data only;
replace pmarf=pmarfr if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;
replace pmarm=pmarmr if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;
replace popm=popmr if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;
replace popf=popfr if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;
replace sr10a=sr10r if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;

gen lnpopf=log(popf);
gen lnpopfu=log(popfu);
gen lnpopfr=log(popfr);
gen lnpopm=log(popm);
gen lnpopmu=log(popmu);
gen lnpopmr=log(popmr);

gen urbshf=popfu/popf;
gen urbshm=popmu/popm;
replace urbshf=0 if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;
replace urbshm=0 if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;

gen pop=popm+popf;
gen popu=popmu+popfu;
gen popr=popmr+popfr;
replace popu=0 if regno==57 | regno==131 | regno==34 | regno==45 | regno==136 | regno==137 | regno==138;

tab agegroup;

gen time=0 if agegroup==1;
replace time=5 if agegroup==2;
replace time=10 if agegroup==3;
replace time=15 if agegroup==4;
replace time=20 if agegroup==5;
replace time=25 if agegroup==6;


*  Age 18-44;
drop if popf < 100;  /* dropping very small cells */
drop if popm < 100;

*  1926 Census regressions;
*  Women, all population;
reg pmarf sr10a lnpopm urbshf i.agegroup i.regno i.regno#c.time , cluster(regno);

*  Men, all population;
reg pmarm sr10a lnpopf urbshm i.agegroup i.regno i.regno#c.time, cluster(regno);

drop if popfu < 100;
drop if popmu < 100;

*  Women, urban population;
reg pmarfu sr10u lnpopmu i.agegroup i.regno i.regno#c.time, cluster(regno);

* Men, urban population;
reg pmarmu sr10u lnpopfu i.agegroup i.regno i.regno#c.time, cluster(regno);

drop if popfr < 100;
drop if popmr < 100;

*  Women, rural population;
reg pmarfr sr10r lnpopmr i.agegroup i.regno i.regno#c.time, cluster(regno);

*  Men, rural population;
reg pmarmr sr10r lnpopfr i.agegroup i.regno i.regno#c.time, cluster(regno);

log close;
clear;



************************************************************;
*1959 Census data and regressions                            ;
************************************************************;

use data_5yr;
sort regno;
merge 1:1 regno using sexratios_5yr;
assert _merge==3;
drop _merge;
sort regno;

* Dropping small regions that are part of other regions;
drop if regno==4 | regno==45 | regno==48 | regno==57 | regno==60 | regno==67 | regno==68 |
        regno==73 | regno==74 | regno==76 | regno==78 | regno==80 |
        regno==81 | regno==86;

*  Stacking data by age group;
stack   region regno
        marrf181959 marrfu181959 marrfr181959 marrm181959 marrmu181959 marrmr181959
        cenchern central esib fareast ncauc north norwest povolzh urals volga wsib
        popf181959 popfu181959 popfr181959 popm181959 popmu181959 popmr181959
        sr10a18 sr10u18 sr10r18
        urbshm181959 urbshf181959

        region regno
        marrf202459 marrfu202459 marrfr202459 marrm202459 marrmu202459 marrmr202459
        cenchern central esib fareast ncauc north norwest povolzh urals volga wsib
        popf202459 popfu202459 popfr202459 popm202459 popmu202459 popmr202459
        sr10a20 sr10u20 sr10r20
        urbshm202459 urbshf202459

        region regno
        marrf252959 marrfu252959 marrfr252959 marrm252959 marrmu252959 marrmr252959
        cenchern central esib fareast ncauc north norwest povolzh urals volga wsib
        popf252959 popfu252959 popfr252959 popm252959 popmu252959 popmr252959
        sr10a25 sr10u25 sr10r25
        urbshm252959 urbshf252959

        region regno
        marrf303459 marrfu303459 marrfr303459 marrm303459 marrmu303459 marrmr303459
        cenchern central esib fareast ncauc north norwest povolzh urals volga wsib
        popf303459 popfu303459 popfr303459 popm303459 popmu303459 popmr303459
        sr10a30 sr10u30 sr10r30
        urbshm303459 urbshf303459

        region regno
        marrf353959 marrfu353959 marrfr353959 marrm353959 marrmu353959 marrmr353959
        cenchern central esib fareast ncauc north norwest povolzh urals volga wsib
        popf353959 popfu353959 popfr353959 popm353959 popmu353959 popmr353959
        sr10a35 sr10u35 sr10r35
        urbshm353959 urbshf353959

        region regno
        marrf404459 marrfu404459 marrfr404459 marrm404459 marrmu404459 marrmr404459
        cenchern central esib fareast ncauc north norwest povolzh urals volga wsib
        popf404459 popfu404459 popfr404459 popm404459 popmu404459 popmr404459
        sr10a40 sr10u40 sr10r40
        urbshm404459 urbshf404459,

        into(region regno
        marrf59 marrfu59 marrfr59 marrm59 marrmu59 marrmr59
        cenchern central esib fareast ncauc north norwest povolzh urals volga wsib
        popf59 popfu59 popfr59 popm59 popmu59 popmr59
        sr10a sr10u sr10r
        urbshm59 urbshf59) clear;

gen lnpopm=log(popm59);
gen lnpopmu=log(popmu59);
gen lnpopmr=log(popmr59);
gen lnpopf=log(popf59);
gen lnpopfu=log(popfu59);
gen lnpopfr=log(popfr59);

ren _stack agegroup;
sort agegroup;

replace sr10a=sr10u if regno==7 | regno==16;

ren marrf59 marrf;
ren marrfu59 marrfu;
ren marrfr59 marrfr;
ren marrm59 marrm;
ren marrmu59 marrmu;
ren marrmr59 marrmr;
ren popf59 popf;
ren popfu59 popfu;
ren popfr59 popfr;
ren popm59 popm;
ren popmu59 popmu;
ren popmr59 popmr;

quietly tab regno, gen(reg);
quietly tab agegroup, gen(agegroup);

gen lgreg=0;
replace lgreg=1 if north==1;
replace lgreg=2 if norwest==1;
replace lgreg=3 if central==1;
replace lgreg=3 if regno==16;
replace lgreg=4 if cenchern==1;
replace lgreg=5 if volga==1;
replace lgreg=6 if povolzh==1;
replace lgreg=7 if ncauc==1;
replace lgreg=8 if urals==1;
replace lgreg=9 if esib==1;
replace lgreg=10 if wsib==1;
replace lgreg=11 if fareast==1;

gen time=0 if agegroup==1;
replace time=1 if agegroup==2;
replace time=2 if agegroup==3;
replace time=3 if agegroup==4;
replace time=4 if agegroup==5;
replace time=5 if agegroup==6;
replace time=6 if agegroup==7;

log using table2b.log, append;

*  1959 Census regressions;
*  Women;
reg marrf sr10a lnpopm urbshf i.agegroup i.regno i.regno#c.time, cluster(regno);

*  Urban;
reg marrfu sr10u lnpopmu i.agegroup i.regno i.regno#c.time, cluster(regno);

*  Rural;
reg marrfr sr10r lnpopmr i.agegroup i.regno i.regno#c.time, cluster(regno);

*  Men;
reg marrm sr10a lnpopf urbshm i.agegroup i.regno i.regno#c.time, cluster(regno);

*  Urban;
reg marrmu sr10u lnpopfu i.agegroup i.regno i.regno#c.time, cluster(regno);

*  Rural;
reg marrmr sr10r lnpopfr i.agegroup i.regno i.regno#c.time, cluster(regno);


log close;



